/*****************************
name:		kayla freeman
date:		8/1/24
purpose:	prep running file of supplier loan characteristics from dealscan
******************************/


clear

/*prelim step 1: get the supplier gvkeys from pair data*/
use 02_supply_chain_links
sort s_gvkey
bysort s_gvkey:keep if _n==1
keep s_gvkey
ren s_gvkey gvkey
destring gvkey,replace
tempfile sg
save `sg'


/*prelim step 2: get fyear end for supps. set an indicator for each month-cal-year combo from comp data, and then link to the start and end data for first year and last year.*/

clear
use 01_comp_basic_raw_vars
keep if indfmt=="INDL"
keep gvkey datadate fyear
sort gvkey fyear
drop if missing(fyear)
bysort gvkey fyear: gen dup = cond(_N==1,0,_n)
tab dup /*go ahead and create running months and keep all for each gvkey fyear combo*/
expand 12
sort gvkey fyear dup
bysort gvkey fyear dup: gen month = month(datadate)
bysort gvkey fyear dup: replace month = month[_n-1]-1 if _n>1
gen calyear = year(datadate)
replace calyear = calyear-1 if month<1
replace month = month+12 if month<1
gen date = mdy(month,1,calyear)
format date %td
drop datadate month calyear
drop dup
sort gvkey fyear date
bysort gvkey fyear date: gen dup = cond(_N==1,0,_n)
tab dup /*these are true dups, so drop extras*/
drop if dup>1
drop dup
sort gvkey date
bysort gvkey date: gen dup = cond(_N==1,0,_n)
tab dup /*cases where a mo can count as two different fyears*/
bysort gvkey date: gen fyearalt = fyear[2]
bysort gvkey date: keep if _n==1
drop dup
destring gvkey,replace
tempfile months /*file has gvkey fyear date fyearalt*/
save `months' /*will come back to this, use for both facstart and facend*/

/*merge into the DS data*/
clear
import excel using "DS_Links2018.xlsx", firstrow case(lower) sheet(link_data)
drop if year(facstartdate)>2016 /*after my sample period*/
merge m:1 gvkey using `sg'
keep if _merge==3
drop _merge

ren facid facilityid
/*drop unnec vars*/
ren discrep_bcoid_flag linkflag
replace linkflag =1 if discrep_comp==0 & discrep_tick==0
drop company coname_h fic score* smbl* ticker discrep*
sort facilityid
tempfile inprog
save `inprog'

clear
use 00_facility
gen year = substr(facilitystartdate,1,4)
destring year,replace
drop if year >2016
drop year
tempfile fac
save `fac'
clear 
use 00_package
keep packageid borrowercompanyid
sort packageid
bysort packageid: keep if _n==1
merge 1:m packageid using `fac'
keep if _merge==3
drop _merge
ren borrowercompanyid bcoid
merge 1:1 facilityid bcoid using `inprog'  
keep if _merge==3
drop _merge

/*fix facilitystartdate*/
gen _year = substr(facilitystartdate,1,4)
gen _mo = substr(facilitystartdate,6,2)
gen _day = substr(facilitystartdate,9,2)
destring _year _mo _day,replace
gen altst = mdy(_mo,_day,_year)
format altst %td
drop facilitystartdate _year _mo _day
ren altst facilitystartdate
/*fix facilityenddate*/
gen _year = substr(facilityenddate,1,4)
gen _mo = substr(facilityenddate,6,2)
gen _day = substr(facilityenddate,9,2)
destring _year _mo _day,replace
gen altst = mdy(_mo,_day,_year)
format altst %td
drop facilityenddate _year _mo _day
ren altst facilityenddate

count if facstartdate!=facilitystartdate /*175*/
replace linkflag = 1 if facstartdate!=facilitystartdate
order facstartdate facilitystart /*keep earlier*/
replace facstartdate = facilitystartdate if facilitystartdate<facstartdate

/*drop unnecessary vars*/
drop bcoid   facilitystartdate   loantype    secured     
gen facstart = mdy(month(facstartdate),1,year(facstartdate)) /*first day of mo to match the fyr-end data*/
format facstart %td
drop facstartdate
gen facend = mdy(month(facilityenddate),1,year(facilityenddate)) /*first day of mo to match the fyr-end data*/
format facend %td
drop facilityenddate

ren facstart date
merge m:1 gvkey date using `months'
/*for _merge==1, make fyear equal to calyear*/
replace fyear = year(date) if _merge==1
drop if _merge==2
drop _merge
ren date facstart
ren fyear st_fyear
ren fyearalt st_fyearalt

/*before merging in for facend, address issue of missing facend: first, replace with maturity-suggested end, when possible. 
	when maturity is missing too, just let facend = facstart; that way it just gets counted in one year*/
gen altend = mofd(facstart)+maturity
format altend %tm
gen altalt =dofm(altend)
format altalt %td
order altalt,after(altend)
replace altend = altalt
drop altalt
replace facend = altend if missing(facend) /*first replace with mat-implied*/
replace facend = facstart if missing(facend) /*next replace with facstart, a.n.*/
drop altend

ren facend date
merge m:1 gvkey date using `months'
replace fyear = year(date) if _merge==1
drop if _merge==2
drop _merge
ren date facend
ren fyear end_fyear
ren fyearalt end_fyearalt
gen yr1 = min(st_fyear ,st_fyearalt)
drop if yr1>2016
gen yrN = max(end_fyear,end_fyearalt)

drop st_fyear* end_fyear*
gen numyrs = yrN-yr1+1
expand numyrs
sort facilityid
gen year = yr1
bysort facilityid: replace year = year[_n-1]+1 if _n>1
gen hasloan=1
drop currency yr1 yrN facstart facend maturity
capture drop _merge
tempfile getback
save `getback'

/*save a file with gvkeys for supps ever in ds*/
keep gvkey year
sort gvkey year
by gvkey: keep if _n==1
gen everds = 1
ren year firstfacyr
ren gvkey s_gvkey
save 06_everDS,replace

/*merge in kcon stuff*/
clear
use `getback'
merge m:1 packageid using 07_Kdata /*data with contract conditions*/
drop _merge

save 07b_kdata_pkg,replace /*need this for the defaults at package outset in Do4*/

sort gvkey year
bysort gvkey year: egen totval = total(valid)
replace valid = totval
replace valid = 1 if valid>1 & !missing(valid)
drop totval


/*drop some unnecessary vars*/
drop conc*
drop kcon_overexceptions
sort gvkey year
capture drop hasak
/*get to a gvkey-year level*/
sort gvkey packageid year
bysort gvkey packageid year: keep if _n==1
bysort gvkey year: egen hasak = total(kcon)
order gvkey year packageid hasak
tab hasak 

sort gvkey year
/*make sure the indicator vars from kcons apply to the gvkey-year*/
foreach var of varlist kcon kcon_elig kcon_solvent kcon_qual kcon_nongov kcon_loc kcon_dollar kcon_hasov kcon_hasconc kcon_hasconcexcept kcon_credlimit{
		bysort gvkey year: egen totalvar = total(`var')
		replace totalvar = 1 if totalvar>0 & !missing(totalvar)
		replace `var' = totalvar
		drop totalvar
		replace `var' = . if hasak==0
}
/*for the continuous ones, use the mean*/
foreach var of varlist kcon_bbase kcon_doi kcon_dod kcon_concperc kcon_concdollar{
	bysort gvkey year: egen totalvar = mean(`var')
	replace `var' = totalvar
	drop totalvar
	replace `var' = . if `var'==0
}
sort gvkey year
by gvkey year: keep if _n==1

drop packageid facilityid numyrs hasak v1
ren gvkey s_gvkey

save 08_supplierDS,replace


